
require(tidyverse)
require(estimatr)
require(Synth)
require(scpi)



####################################################################################################
####################################################################################################
#
#         Containing Large-Scale Criminal Violence through Internationalized Prosecution: 
#              How the Collaboration between the CICIG and Guatemala’s Law Enforcement
#                  Contributed to a Sustained Reduction in the Murder Rate
#         
#                                             - main text -
#
#                             Guillermo Trejo                   Camilo Nieto-Matiz
#                               gtrejo@nd.edu                 camilo.nieto-matiz@utsa.edu
#                 
####################################################################################################
####################################################################################################


#load data
load("guatemala data.rda")


#========================================================= 
#                        Figure 5
#         Homicide Rates in Guatemala (Treated) and 
#              Synthetic Guatemala, 2002–2019
#========================================================= 


# create matrix for GUATEMALA
dataprep.out<-
  dataprep(
    foo = dat,
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2005:2007), c("mean"))),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica", 
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2003:2007),
    time.optimize.ssr = c(2003:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )



## Synthetic values
synth.out <- synth(data.prep.obj=dataprep.out
);synth.out




## Summary 
synth.tables <- synth.tab(
  dataprep.res = dataprep.out,
  synth.res = synth.out)
print(synth.tables, digits = 6)



## save estimates in tibble
synth_plot <-as_tibble(cbind(dataprep.out$Y1plot, dataprep.out$Y0plot%*%synth.out$solution.w, 2002:2019)) %>% 
  dplyr::rename(synth_g=w.weight, hom_g=`10`, year=V3) %>%
  pivot_longer(cols=c(hom_g, synth_g),
               names_to = "Variable",
               values_to= "Value")


# plot
ggplot(synth_plot, aes(y=Value, x=year, color=Variable, linetype=Variable))+
  geom_line(size=1) +
  ylim(10,80) + theme_bw() +
  labs(y="Homicide rates", x="") +
  scale_color_manual(values=c("black", "blue"),labels=c("Guatemala","Synthetic Guatemala")) +
  scale_linetype_manual(values = c(1,2),labels=c("Guatemala","Synthetic Guatemala")) +
  scale_x_continuous(expand = c(0, 0), breaks = c(2002, 2004,2006, 2008, 2010, 2012,2014,2016,2018)) +
  geom_vline(xintercept=2008, color="red", linetype=2)+
  theme(legend.position = c(0.2,0.15), 
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"),
        axis.text.x=element_text(size=rel(0.8)))
 





#=============================================================== 
#                        Figure 6
#   Permutation Test: Homicide Gaps in Guatemala vs. All Countries
#===============================================================



#create new matrix to store values
dat_pt <- dat %>%
  filter(country %in% c("Guatemala","Honduras", "Panama",  "Venezuela","Bolivia",      
                        "Dominican Republic", "Nicaragua", "Costa Rica")) %>%
  transform(id=as.numeric(factor(id))) 

store.gu <- matrix(NA,length(2002:2019),length(unique(dat_pt$country)))
colnames(store.gu) <- unique(dat_pt$country)


# run placebo test
for(iter in 1:length(unique(dat_pt$country)))
{
  dataprep.gu<-
    dataprep(
      foo = dat_pt,
      predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
      predictors.op = "mean",
      dependent = "homrates_oms",
      unit.variable = "id",
      time.variable = "year",
      special.predictors = list(
        list("homrates_oms", c(2005:2007), c("mean"))),
      treatment.identifier = iter,
      controls.identifier = c(1:(length(unique(dat_pt$country))-1))[-iter],
      time.predictors.prior = c(2003:2007),
      time.optimize.ssr = c(2003:2007),
      unit.names.variable = "country",
      time.plot = c(2002:2019)
    )
  
  
  # run synth
  synth.out.gu <- synth(
    data.prep.obj = dataprep.gu,
    method = "BFGS"
  )
  
  # store gaps
  store.gu[,iter] <- dataprep.gu$Y1plot - (dataprep.gu$Y0plot %*% synth.out.gu$solution.w)
}


# convert to long format 
perm.dat <-   as.data.frame(store.gu)  %>% 
  mutate(year=2002:2019) %>% 
  pivot_longer(cols=c("Bolivia":"Venezuela"),
               names_to='Country',
               values_to='Values') %>% 
  mutate(treat=ifelse(Country=="Guatemala","Guatemala","Control countries")) 


# plot
perm.dat %>%
  filter(!Country %in% c( 'Honduras'))%>% #countries with pre-cicig MSPE 5 times larger that Guatemala's
  ggplot(aes(year, Values, group=Country, size=treat, color=treat))+
  geom_line()  + 
  scale_color_manual(values=c("gray80", "blue"),labels=c("Other countries","Guatemala")) +
  scale_size_manual(values=c(.5,1.2),labels=c("Other countries","Guatemala"))+
  labs(x="", y="Homicide rates gaps") +
  geom_hline(yintercept = 0, linetype=1, color="gray30")+
  geom_vline(xintercept = 2008, linetype=2, color="red")+
  theme_bw()+
  theme(legend.position = c(0.2,0.15), 
        legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent"),
        axis.text.x=element_text(size=rel(0.8)))+
  ylim(-45,50) + scale_x_continuous(expand = c(0, 0), breaks=seq(2002,2018,2)) 







#=============================================================== 
#                        Figure 7
#          Ratio of post-CICIG RMSPE to pre-CICIG RMSPE
#===============================================================

# compute ratio of post-CICIG RMSPE to pre-CICIG RMSPE                                                  
rmse <- function(x){sqrt(mean(x^2))}
rmspe <- data.frame(rmspe = apply(store.gu[7:18,],2,rmse)/apply(store.gu[1:6,],2,rmse),
                    country = colnames(store.gu)) %>%
  mutate(country=fct_reorder(country,rmspe))


#plot
ggplot(rmspe) +
  geom_point(aes(y=country, x=rmspe), color="blue", size=2) + 
  theme_bw() + labs(y="",x="Post-CICIG to Pre-CICIG RMSPE Ratio") +
  geom_vline(xintercept=median(rmspe$rmspe), color="red", linetype="dashed")









#=============================================================== 
#                        Figure 8
#          Prediction Intervals for the Homicide Rate
#===============================================================

### See "CPS prediction intervals - internationalized prosecution and criminal violence.R" 
### for all code pertaining to prediction intervals.




#=============================================================== 
#                        Figure 9
#       Estimated Number of Lives Saved by IP in Guatemala
#===============================================================

# create matrix for GUATEMALA
dataprep.out<-
  dataprep(
    foo = dat,
    predictors = c("loggdp", "logimr", "growth", "ginimkt", "schooling2"),
    predictors.op = "mean",
    dependent = "homrates_oms",
    unit.variable = "id",
    time.variable = "year",
    special.predictors = list(
      list("homrates_oms", c(2005:2007), c("mean"))),
    treatment.identifier = "Guatemala",
    controls.identifier = c("Honduras", "Panama",  "Venezuela", "Costa Rica", 
                            "Dominican Republic", "Nicaragua", "Bolivia"),
    time.predictors.prior = c(2003:2007),
    time.optimize.ssr = c(2003:2007),
    unit.names.variable = "country",
    time.plot = c(2002:2019)
  )



## Synthetic values
synth.out <- synth(data.prep.obj=dataprep.out
);synth.out

 
## extract treated unit values and synthetic values  
dat1 <- data.frame(dataprep.out$Y0plot%*%synth.out$solution.w, dataprep.out$Y1plot) 
dat1 <- dat1 %>% dplyr::rename(synthetic=w.weight, values=X10) %>%
  mutate(year=2002:2019)


# merge with population data
values <- dat %>% 
  filter(country=="Guatemala"&year>=2002&year<=2019) %>% mutate(pop=exp(logpop)) %>%
  select(year, pop) %>%
  left_join(dat1, by='year') %>%
  mutate(gaps=synthetic-values,         # calculate difference between synthetic and real rates
         values_pop=gaps/100000*pop,    # normalize gaps by population
         dd=11.06,                      # diff in diff estimate (appendix C.6)
         dd_pop=dd/100000*pop)          # transform dd estimate and normalize by population 


# how many homicides have been prevented?
values %>%
  filter(year>=2008&year<=2019) %>%     # between 2008 and 2019
  summarise(sum_sl = sum(values_pop),   # sum 
            mean_sl = mean(values_pop), # yearly average
            dd_pop=sum(dd_pop))         # sum (diff in diff estimate)


# plot
ggplot(values, aes(factor(year), values_pop))+
  geom_bar(fill="slategrey",stat="identity")+
  theme_bw()+xlab('Year')+theme(axis.text.x = element_text(angle = 40))+
  labs(y='Number of Homicides', x='')+theme(plot.title = element_text(hjust = 0.5))+
  scale_y_continuous(breaks=seq(0,6000,1000))+
  geom_vline(xintercept=6.5, color="red", linetype=2)






#=============================================================== 
#                        Figure 10
#   HR Violations in Guatemala and Synthetic Guatemala, 2002–2019
#===============================================================


### See "CPS prediction intervals - internationalized prosecution and criminal violence.R" 
### for all code pertaining to prediction intervals.


